setwd("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R")
Customer_Acquisition=read.csv("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R/Credit Card Case Study/Customer Acqusition.csv")
Repayment=read.csv("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R/Credit Card Case Study/Repayment.csv")
Spend=read.csv("C:/Users/Skasiv/Dropbox/My PC (LAPTOP-L8J5JVJ8)/Desktop/DVA/R/Credit Card Case Study/spend.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(vtree)
library(ggplot2)
library(ggrepel)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
merged=inner_join(inner_join(Spend,Repayment,by=c("Sl.No."="SL.No.")),Customer_Acquisition,by=c("Customer.x"="Customer"))
View(merged)
Ques1 1.a
merged$Age[merged$Age<18]=mean(Customer_Acquisition$Age)
1.b
merged$Amount.x[merged$Amount.x>merged$Limit]=(1/2)*merged$Limit[merged$Amount.x>merged$Limit]
1.c
merged$Amount.y[merged$Amount.y>merged$Limit]=merged$Limit[merged$Amount.y>merged$Limit]
Ques2
2.a
print(paste("Number of Distinct Customers are",n_distinct(Customer_Acquisition$Customer)))
## [1] "Number of Distinct Customers are 100"
2.b
distinct(data.frame(Customer_Acquisition$Segment))
## Customer_Acquisition.Segment
## 1 Self Employed
## 2 Salaried_MNC
## 3 Salaried_Pvt
## 4 Govt
## 5 Normal Salary
print(paste("There are",n_distinct(Customer_Acquisition$Segment),"Distinct Categories"))
## [1] "There are 5 Distinct Categories"
2.c Since we have changed some spend values on the basis of limit for each customer The monthly average spend by customers is calculated using merged dataset only
options(dplyr.summarise.inform=FALSE)
merged%>%group_by(Month=months(dmy(Month.x)))%>%summarise(Average_Spend=round(mean(Amount.x),2))
## # A tibble: 12 x 2
## Month Average_Spend
## <chr> <dbl>
## 1 April 135036.
## 2 August 165760.
## 3 December 117273.
## 4 February 151262.
## 5 January 147770.
## 6 July 166327.
## 7 June 182038.
## 8 March 131841.
## 9 May 151624.
## 10 November 150428.
## 11 October 131018.
## 12 September 141132.
2.d
options(dplyr.summarise.inform=FALSE)
merged%>%group_by(Month=months(dmy(Month.y)))%>%summarise(Average_Repayment=round(mean(Amount.y),2))
## # A tibble: 12 x 2
## Month Average_Repayment
## <chr> <dbl>
## 1 April 167677.
## 2 August 152817.
## 3 December 190139
## 4 February 158873.
## 5 January 163446.
## 6 July 165204.
## 7 June 114147.
## 8 March 156431.
## 9 May 186098.
## 10 November 152692.
## 11 October 170119.
## 12 September 129346.
2.e
merged%>%group_by(months(dmy(Month.x)))%>%summarise(Profit_for_bank=(((sum(Amount.y)-sum(Amount.x))*2.9)/100))
## # A tibble: 12 x 2
## `months(dmy(Month.x))` Profit_for_bank
## <chr> <dbl>
## 1 April 68084.
## 2 August 13207.
## 3 December 35309.
## 4 February 223125.
## 5 January 117541.
## 6 July -32640.
## 7 June -10368.
## 8 March 152192.
## 9 May 76720.
## 10 November 15119.
## 11 October 44343.
## 12 September 28205.
2.f
merged%>%group_by(Type)%>%summarise(Frequency=length(Type))%>%arrange(desc(Frequency))%>%head(5)
## # A tibble: 5 x 2
## Type Frequency
## <chr> <int>
## 1 PETRO 200
## 2 CAMERA 160
## 3 FOOD 160
## 4 AIR TICKET 147
## 5 TRAIN TICKET 132
merged%>%group_by(Type)%>%summarise(Frequency=length(Type))%>%arrange(desc(Frequency))%>%head(5)%>%ggplot(aes(x=Type,y=Frequency))+geom_bar(stat="identity",color="dark blue",fill="yellow")
2.g
merged%>%group_by(City)%>%summarise(Spend=sum(Amount.x))%>%arrange(desc(Spend))%>%head(1)%>%select(City)
## # A tibble: 1 x 1
## City
## <chr>
## 1 COCHIN
2.h
merged%>%group_by(Age_Group=cut(Age,seq(18,88,8)))%>%summarise(Money_Spend=sum(Amount.x))%>%arrange(desc(Money_Spend))%>%head(1)
## # A tibble: 1 x 2
## Age_Group Money_Spend
## <fct> <dbl>
## 1 (42,50] 53242391.
2.h Visual Form
merged%>%group_by(Age_Group=cut(Age,seq(18,88,8)))%>%summarise(Money_Spend=sum(Amount.x))%>%ggplot(aes(x=Age_Group,y=Money_Spend))+geom_bar(stat="identity",fill="dark blue")+scale_y_continuous(labels = label_number(suffix = " M", scale = 1e-6))
2.i
merged%>%group_by(Customer=Customer.y)%>%summarise(Repayment=sum(Amount.y))%>%arrange(desc(Repayment))%>%head(10)
## # A tibble: 10 x 2
## Customer Repayment
## <chr> <dbl>
## 1 A61 10539143.
## 2 A60 9876291.
## 3 A13 9572001.
## 4 A43 8489871.
## 5 A45 8448335.
## 6 A12 8334760.
## 7 A14 7943269.
## 8 A44 7744730.
## 9 A39 7622483.
## 10 A42 7615461.
Ques 3
merged%>%group_by(Year=year(dmy(Month.x)),City,Product)%>%summarise(Spend=sum(Amount.x))
## # A tibble: 72 x 4
## # Groups: Year, City [24]
## Year City Product Spend
## <dbl> <chr> <chr> <dbl>
## 1 2004 BANGALORE Gold 9289879.
## 2 2004 BANGALORE Platinum 1112732.
## 3 2004 BANGALORE Silver 1291853.
## 4 2004 BOMBAY Gold 6987854.
## 5 2004 BOMBAY Platinum 897265.
## 6 2004 BOMBAY Silver 532089.
## 7 2004 CALCUTTA Gold 7477141.
## 8 2004 CALCUTTA Platinum 2037690.
## 9 2004 CALCUTTA Silver 500006
## 10 2004 CHENNAI Gold 1059618.
## # ... with 62 more rows
Graphical Representation
ggplotly(merged%>%group_by(year_city=paste(Year=year(dmy(Month.x)),City),Product)%>%summarise(Spend=sum(Amount.x))%>%ggplot(aes(x=year_city,y=Spend,fill=Product))+geom_bar(stat="identity",position = "dodge")+xlab("")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+scale_y_continuous(labels = label_number(suffix = " M", scale = 1e-6)))
Ques4
4.a
merged%>%group_by(Month=months(dmy(Month.x)),City)%>%summarise(Total_Spend=sum(Amount.x))%>%ggplot(aes(x=City,y=Total_Spend,fill=Month))+geom_bar(stat="identity",position=position_dodge(width = 0.5))+scale_y_continuous(labels = label_number(suffix = " M", scale = 1e-6))+theme(axis.text.x=element_text(size=7))
4.b
merged%>%group_by(Year=year(dmy(Month.x)),Type)%>%summarise(Spend=sum(Amount.x))%>%filter(Type=="AIR TICKET")%>%ggplot(aes(x=factor(Year),y=Spend,fill=Type))+geom_bar(stat="identity",width=0.32,fill="yellow",color="dark blue")+scale_y_continuous(labels=label_number(suffix = "M",scale = 1e-6))+ggtitle("Yearly spend on Air Tickets")+guides(fill=FALSE)+xlab("Year")
4.c
merged%>%group_by(Month=months(dmy(Month.x)),Product)%>%summarise(Spend=sum(Amount.x))%>%ggplot(aes(x=Product,y=Spend,fill=Month))+geom_bar(stat="identity",position ="dodge",color="white")+scale_fill_manual(values = c("#141155","#b3b023","#d62e61","#a80000","#eab676","#2c8030","#873e23","#21130d","#A99a93","#A893a9","#93a99a","#F5d40f"))+scale_y_continuous(labels=label_number(suffix="M",scale = 1e-6))+ggtitle(" Monthly Spend For Each Product")
Ques5
## # A tibble: 211 x 5
## # Groups: Product, Month, City [84]
## Product Month City Customer Repay
## <chr> <chr> <chr> <chr> <dbl>
## 1 Gold April BANGALORE A14 812582.
## 2 Gold April BANGALORE A43 612542.
## 3 Gold April BANGALORE A1 508949.
## 4 Gold April BANGALORE A13 494392.
## 5 Gold April BANGALORE A92 459106.
## 6 Gold April BANGALORE A30 425694.
## 7 Gold April BOMBAY A62 1115760.
## 8 Gold April BOMBAY A12 601326.
## 9 Gold April BOMBAY A17 391463.
## 10 Gold April BOMBAY A91 247781
## # ... with 201 more rows